#!/bin/bash
set -x

hadoop_exe=/home/a/libexec64/hadoop/current/bin/hadoop
hadoop_streaming_jar=/home/a/libexec64/hadoop/current/contrib/streaming/hadoop-streaming-0.20.2-cdh3u0.jar

input_dir=/platform/output/1/bucket0_201203221039.bak
output_dir=/test/bucket0_201203221039_seq

$hadoop_exe fs -rmr $output_dir 
$hadoop_exe jar $hadoop_streaming_jar -D mapred.reduce.tasks=0 \
                                      -D mapred.job.name="Convert Format" \
                                      -D mapred.compress.map.output=true \
                                      -D mapred.map.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec \
                                      -input $input_dir \
                                      -output $output_dir \
                                      -inputformat org.apache.hadoop.mapred.TextInputFormat \
                                      -outputformat org.apache.hadoop.mapred.SequenceFileOutputFormat \
                                      -mapper /bin/cat \
#-reducer /bin/cat 
#/init/init_20120312/match/*
#org.apache.hadoop.mapred.SequenceFileAsTextInputFormat
#-D mapred.max.split.size=500000000 \
#-D mapred.min.split.size=500000000 \

